Preprocessing

library(tidyverse)
library(ggdist)
library(ggside)
library(easystats)
library(patchwork)

illusion1 <- read.csv("../data/raw_illusion1.csv") |>
  mutate(
    Illusion_Effect = fct_relevel(as.factor(Illusion_Effect), "Incongruent", "Congruent"),
    Block = as.factor(Block)
  )

illusion2 <- read.csv("../data/raw_illusion2.csv") |>
  mutate(
    Illusion_Effect = fct_relevel(as.factor(Illusion_Effect), "Incongruent", "Congruent"),
    Block = as.factor(Block)
  )

perceptual <- read.csv("../data/raw_perceptual.csv") |>
  mutate(
    Block = as.factor(Block)
  )

sub <- read.csv("../data/raw_questionnaires.csv")

# unique(sub$Education)
#   mutate(
#     Education = fct_relevel(Education, "High School", "Bachelor", "Master", "Doctorate", "Other", "Prefer not to Say")
#   )

# For prolific:
# bayestestR::point_estimate(sub$Duration_Session1)
# plot(bayestestR::estimate_density(sub$Duration_Session1))

Outlier Detection (Tasks)

# Dear participant, thank you for participating in our study. Unfortunately, our system detected multiple issues in your data (such as implausibly short responses - in particular in the 2nd part of the study, random-like pattern of answers, or the same response to different scales - as some were designed to trigger various answers), which makes it unusable. We understand that you might have been in a hurry or had some other issues, and so we kindly ask you to return your participation; we hope to open-up more slots in the future would you be interested to participate again. 

# Dear participant, thank you for participating in our study. Unfortunately, our system detected multiple issues in your data (such as implausibly short responses - in particular in the 2nd part of the study, random-like pattern of answers, or the same response to different scales - as some were designed to trigger various answers), which makes it unusable for us. We understand that you might have been in a hurry or had some other issues; we hope to open-up more slots in the future would you be interested to participate again. 

outliers_perceptual <- c(
  "S003",
  "S008"
)
outliers_illusion1 <- c(
  "S008"
  )
outliers_illusion2 <- c(
  "S003"
)

We removed 1, 2, and 1 participants for the illusion task - session 1, perceptual task, and illusion task - session 2 respectively, upon inspection of the average error rage (when close to 50%, suggesting random answers) and/or when the reaction time distribution was implausibly fast.

Descriptive Table

data <- rbind(illusion1, illusion2, perceptual) |>
  filter(RT < 10) |>
  mutate(
    Participant = fct_rev(Participant),
    Task = fct_relevel(Task, "Illusion_Session1", "Perceptual", "Illusion_Session2")
  )

table <- data |>
  group_by(Participant, Task) |>
  summarize(
    Error = sum(Error) / n(),
    RT = mean(RT)
  ) |>
  ungroup() |>
  arrange(desc(Error)) |>
  tidyr::pivot_wider(names_from = "Task", values_from = c("Error", "RT"), names_vary = "slowest") |>
  datawizard::data_relocate(ends_with("Session2"), after = -1) |>
  arrange(desc(Error_Illusion_Session1))
data.frame(Participant = c("Average"), t(sapply(table[2:ncol(table)], mean, na.rm = TRUE))) |>
  rbind(table) |>
  knitr::kable() |>
  kableExtra::row_spec(1, italic = TRUE, background = "grey", color = "white") |>
  kableExtra::row_spec(which(table$Participant %in% c(outliers_perceptual, outliers_illusion1, outliers_illusion2)) + 1, background = "#EF9A9A") |>
  # kableExtra::column_spec(2, color="white",
  #                         background = kableExtra::spec_color(c(NA, table$Error_Illusion_Session1))) |>
  kableExtra::kable_styling(full_width = TRUE) |>
  kableExtra::scroll_box(width = "100%", height = "500px")
Participant Error_Perceptual RT_Perceptual Error_Illusion_Session1 RT_Illusion_Session1 RT_Illusion_Session2 Error_Illusion_Session2
Average 0.073 0.666 0.180 0.747 0.731 0.265
S008 0.500 0.347 0.417 0.610
S002 0.115 0.872 0.346 0.713 0.945 0.396
S023 0.125 0.619 0.346 0.631
S021 0.250 0.703 0.263 0.544
S059 0.021 0.488 0.260 0.542
S006 0.089 0.620 0.258 0.529 0.723 0.216
S053 0.151 0.475 0.245 0.511
S048 0.052 0.502 0.242 0.502
S025 0.073 0.598 0.237 0.713
S007 0.089 0.770 0.234 0.894
S036 0.068 0.749 0.232 0.845
S014 0.047 0.548 0.227 0.641
S019 0.052 0.496 0.224 0.618
S046 0.073 0.553 0.221 0.620
S042 0.161 0.493 0.221 0.560
S003 0.214 0.441 0.211 0.853 0.331 0.371
S043 0.057 0.823 0.208 0.688
S001 0.323 0.946 0.199 0.986
S052 0.031 0.541 0.195 0.537
S047 0.094 0.624 0.191 0.823
S026 0.036 0.732 0.182 0.934
S017 0.068 0.534 0.180 0.606
S040 0.026 0.829 0.180 0.929
S044 0.016 0.642 0.177 0.665
S035 0.099 0.497 0.177 0.523
S030 0.068 0.546 0.177 0.718
S013 0.062 0.563 0.177 0.706
S034 0.099 0.645 0.174 0.792
S022 0.078 0.500 0.174 0.601
S038 0.115 0.618 0.167 0.691
S033 0.083 0.698 0.167 0.752
S024 0.042 0.756 0.167 0.735
S020 0.073 0.833 0.167 0.820
S050 0.016 0.714 0.164 1.025
S009 0.010 0.633 0.164 0.833
S054 0.057 0.535 0.161 0.576
S039 0.083 0.517 0.161 0.524
S004 0.010 0.800 0.161 1.051
S011 0.042 0.657 0.159 0.755 0.687 0.185
S032 0.031 0.585 0.154 0.550
S058 0.036 0.596 0.146 0.609
S057 0.021 0.562 0.146 0.645
S045 0.010 0.912 0.141 0.730
S015 0.047 0.643 0.135 0.580
S060 0.042 0.695 0.130 0.947
S041 0.052 0.672 0.130 0.751
S051 0.016 0.569 0.128 0.753
S037 0.047 0.756 0.128 0.965
S018 0.052 0.682 0.128 0.816
S016 0.021 0.605 0.128 0.678
S061 0.021 0.593 0.126 1.281
S055 0.057 0.553 0.125 0.756
S031 0.016 0.669 0.125 0.780
S005 0.000 0.999 0.125 0.877
S010 0.068 1.439 0.122 1.152 0.968 0.159
S027 0.010 0.706 0.122 0.730
S028 0.052 0.775 0.117 0.887
S056 0.052 0.568 0.115 0.664
S049 0.042 0.990 0.096 0.968
S029 0.068 0.875 0.094 0.924
S012 0.016 0.699 0.094 0.928

Reaction Time Distribution

p <- data |>
  # filter(as.numeric(gsub("\\D", "", Participant)) >= 22) |>
  estimate_density(select = "RT", at = c("Participant", "Task", "Block")) |>
  group_by(Participant) |>
  normalize(select = "y") |>
  ungroup() |>
  mutate(
    # Participant = fct_relevel(Participant, as.character(table$Participant)),
    color = case_when(
      Participant %in% outliers_perceptual & Task == "Perceptual" ~ "red",
      Participant %in% outliers_illusion1 & Task == "Illusion_Session1" ~ "red",
      Participant %in% outliers_illusion2 & Task == "Illusion_Session2" ~ "red",
      TRUE ~ "blue"
    ),
    Task = fct_recode(Task,
      "Illusion task (session 1)" = "Illusion_Session1",
      "Illusion task (session 2)" = "Illusion_Session2",
      "Perceptual task" = "Perceptual"
    )
  ) |>
  ggplot(aes(x = x, y = y)) +
  geom_area(data = normalize(estimate_density(data, select = "RT"), select = "y"), alpha = 0.2) +
  geom_line(aes(color = color, group = interaction(Participant, Block), linetype = Block), size = 1) +
  geom_vline(xintercept = 0.125, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("red" = "#F44336", "orange" = "#FF9800", "blue" = "blue"), guide = "none") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  coord_cartesian(xlim = c(0, 2)) +
  theme_modern() +
  theme(axis.text.y = element_blank()) +
  facet_grid(Participant ~ Task) +
  labs(y = "", x = "Reaction Time (s)")
# p
ggsave("figures/outliers_RT.png", p, width = 10, height = 0.5*nrow(sub), dpi = 150)
knitr::include_graphics("figures/outliers_RT.png")

illusion1 <- filter(illusion1, !Participant %in% outliers_illusion1)
illusion2 <- filter(illusion2, !Participant %in% outliers_illusion2)
perceptual <- filter(perceptual, !Participant %in% outliers_perceptual)

Outliers Detection (Blocks)

For each block, we computed the error rate and, if more than 50%, we discarded the whole block (as it likely indicates that instructions got mixed up, for instance participants were selecting the smaller instead of the bigger circle).

data <- rbind(illusion1, illusion2, perceptual) |>
  group_by(Participant, Task, Illusion_Type, Block) |>
  summarize(ErrorRate_per_block = sum(Error) / n()) |>
  ungroup() |>
  arrange(desc(ErrorRate_per_block))


data |>
  estimate_density(at = c("Task", "Illusion_Type", "Block"), method = "KernSmooth") |>
  ggplot(aes(x = x, y = y)) +
  geom_line(aes(color = Illusion_Type, linetype = Block)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  scale_color_manual(values = c("Ebbinghaus" = "#2196F3", "MullerLyer" = "#4CAF50", "VerticalHorizontal" = "#FF5722")) +
  labs(y = "Distribution", x = "Error Rate") +
  theme_modern() +
  facet_wrap(~Task)

remove_badblocks <- function(df) {
  n <- nrow(df)
  df <- df |>
    group_by(Participant, Illusion_Type, Block) |>
    mutate(ErrorRate_per_block = sum(Error) / n()) |>
    ungroup() |>
    filter(ErrorRate_per_block < 0.5) |>
    select(-ErrorRate_per_block)
  text <- paste0(
    "We removed ",
    n - nrow(df),
    " (",
    insight::format_value((n - nrow(df)) / n, as_percent = TRUE),
    ") trials belonging to bad blocks."
  )
  list(data = df, text = text)
}

out <- remove_badblocks(illusion1)
print(paste("Illusion (session 1):", out$text))

[1] “Illusion (session 1): We removed 128 (0.56%) trials belonging to bad blocks.”

illusion1 <- out$data

out <- remove_badblocks(illusion2)
print(paste("Illusion (session 2):", out$text))

[1] “Illusion (session 2): We removed 64 (4.17%) trials belonging to bad blocks.”

illusion2 <- out$data

out <- remove_badblocks(perceptual)
print(paste("Perceptual task:", out$text))

[1] “Perceptual task: We removed 96 (0.85%) trials belonging to bad blocks.”

perceptual <- out$data

Outliers Detection (Trials)

Reaction Time per Trial

check_trials <- function(df) {
  data <- df |>
    mutate(Outlier = ifelse(RT >= 10, TRUE, FALSE)) |>
    group_by(Participant) |>
    mutate(Outlier = ifelse(RT < 0.125 | standardize(RT, robust = TRUE) > 4, TRUE, Outlier)) |>
    ungroup()

  p1 <- data |>
    filter(RT < 10) |>
    estimate_density(select = "RT", at = "Participant") |>
    group_by(Participant) |>
    normalize(select = "y") |>
    ungroup() |>
    merge(data |>
      group_by(Participant) |>
      mutate(Threshold = median(RT) + 4 * mad(RT)) |>
      filter(Error == 0) |>
      summarize(Threshold = mean(Threshold))) |>
    mutate(Outlier = ifelse(x >= Threshold, TRUE, FALSE)) |>
    ggplot(aes(x = x, y = y)) +
    geom_area(data = normalize(estimate_density(filter(data, RT < 10), select = "RT"), select = "y"), alpha = 0.2) +
    geom_line(aes(color = Participant, linetype = Outlier), alpha = 0.2) +
    geom_vline(xintercept = c(125), linetype = "dashed", color = "red") +
    scale_color_material_d("rainbow", guide = "none") +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    guides(linetype = "none") +
    coord_cartesian(xlim = c(0, 5)) +
    theme_modern() +
    theme(axis.text.y = element_blank()) +
    labs(y = "", x = "Reaction Time (s)")


  p2 <- data |>
    group_by(Participant) |>
    summarize(Outlier = sum(Outlier) / nrow(illusion1)) |>
    mutate(Participant = fct_reorder(Participant, Outlier)) |>
    ggplot(aes(x = Participant, y = Outlier)) +
    geom_bar(stat = "identity", aes(fill = Participant)) +
    scale_fill_material_d("rainbow", guide = "none") +
    scale_x_discrete(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0), labels = scales::percent) +
    see::theme_modern() +
    theme(axis.text.x = element_blank()) +
    labs(y = "Percentage of outlier trials")

  text <- paste0(
    "We removed ",
    sum(data$Outlier),
    " (",
    insight::format_value(sum(data$Outlier) / nrow(data), as_percent = TRUE),
    ") outlier trials (125 ms < RT < 4 MAD above median)."
  )

  data <- filter(data, Outlier == FALSE)
  data$Outlier <- NULL

  list(p = p1 / p2, data = data, text = text)
}

Illusion Task (Session 1)

out <- check_trials(illusion1)
print(paste("Illusion (session 1):", out$text))

[1] “Illusion (session 1): We removed 780 (3.40%) outlier trials (125 ms < RT < 4 MAD above median).”

out$p

illusion1 <- out$data

Illusion Task (Session 2)

out <- check_trials(illusion2)
print(paste("Illusion (session 2):", out$text))

[1] “Illusion (session 2): We removed 49 (3.33%) outlier trials (125 ms < RT < 4 MAD above median).”

out$p

illusion2 <- out$data

Perceptual Task

out <- check_trials(perceptual)
print(paste("Perceptual task:", out$text))

[1] “Perceptual task: We removed 500 (4.45%) outlier trials (125 ms < RT < 4 MAD above median).”

out$p

perceptual <- out$data

Outlier Detection (Questionnaires)

Multivariate Detection

We compute a multivariate outlier score.

outliers <- sub |> 
  select(AttentionCheck_Session1, 
         IPIP6_RT, PID5_RT, GCBS_RT, ASQ_RT, LIE_RT, SPQ_RT, 
         IPIP6_SD, PID5_SD, PHQ4_SD) |> 
  standardize() |> 
  performance::check_outliers(method = c("mahalanobis", "mahalanobis_robust", "mcd", "ics"))

sub$Potential_Outliers <- as.data.frame(outliers)$Outlier
outliers
## 4 outliers detected: cases 52, 54, 56, 60.
## - Based on the following methods and thresholds: mahalanobis (29.59),
##   mahalanobis_robust (29.59), mcd (29.59), ics (0).
## - For variables: AttentionCheck_Session1, IPIP6_RT, PID5_RT, GCBS_RT,
##   ASQ_RT, LIE_RT, SPQ_RT, IPIP6_SD, PID5_SD, PHQ4_SD.
## 
## Note: Outliers were classified as such by at least half of the selected methods. 
## 
## -----------------------------------------------------------------------------
## The following observations were considered outliers for two or more variables 
## by at least one of the selected methods: 
## 
##    Row  n_Mahalanobis n_Mahalanobis_robust          n_MCD          n_ICS
## 1   52 (Multivariate)       (Multivariate) (Multivariate)              0
## 2   54 (Multivariate)       (Multivariate) (Multivariate) (Multivariate)
## 3   56 (Multivariate)       (Multivariate) (Multivariate)              0
## 4   60 (Multivariate)       (Multivariate) (Multivariate) (Multivariate)
## 5    8              0       (Multivariate) (Multivariate)              0
## 6   11              0       (Multivariate)              0              0
## 7   14              0       (Multivariate)              0              0
## 8   17              0       (Multivariate) (Multivariate)              0
## 9   18              0       (Multivariate) (Multivariate)              0
## 10  20              0       (Multivariate) (Multivariate)              0
## 11  29              0       (Multivariate) (Multivariate)              0
## 12  33              0       (Multivariate)              0              0
## 13  35              0       (Multivariate) (Multivariate)              0
## 14  41              0       (Multivariate) (Multivariate)              0
## 15  48              0       (Multivariate) (Multivariate)              0
## 16  50              0       (Multivariate) (Multivariate)              0
## 17  55              0       (Multivariate) (Multivariate)              0
## 18  57              0       (Multivariate) (Multivariate)              0
## 19  58              0       (Multivariate) (Multivariate)              0
## 20  31              0                    0 (Multivariate)              0

Manual Check

outliers_questionnaires <- c(
  "S003", "S008"
)

We removed the questionnaire data from 2 participants upon inspection of attention checks and time taken to complete each questionnaires.

table <- sub |>
  mutate(
    Outlier_Task1 = Participant %in% outliers_illusion1,
    Outlier_Task2 = Participant %in% outliers_illusion2,
    Outlier_Task3 = Participant %in% outliers_perceptual,
    Outlier_Tasks = Outlier_Task1 + Outlier_Task2 + Outlier_Task3
  ) |>
  select(
    Participant,
    Outlier_Tasks,
    Potential_Outliers,
    AttentionCheck_Session1,
    IPIP6_RT, PID5_RT, ASQ_RT, SPQ_RT,
    IPIP6_SD, PID5_SD, PHQ4_SD,
    AttentionCheck_Session2,
    BPD_RT, MAIA_RT, PI_RT,
    BPD_SD, MAIA_SD, PI_SD
  ) |> 
  # mutate(across(ends_with("IPIP6_RT") | ends_with("IPIP6_SD"), standardize)) |> 
  # arrange(desc(Outlier_Tasks), AttentionCheck_Session1) 
  arrange(desc(Participant))
t <- data.frame(Participant = c("Average"), t(sapply(table[2:ncol(table)], mean, na.rm = TRUE))) |>
  rbind(table) |>
  knitr::kable() |>
  kableExtra::row_spec(1, italic = TRUE) |>
  kableExtra::row_spec(which(table$Participant %in% outliers_questionnaires) + 1, background = "#EF9A9A")

for (i in 2:ncol(table)) {
  t <- kableExtra::column_spec(
    t, i,
    color = "white",
    background = kableExtra::spec_color(
      c(NA, table[[i]]),
      option = "D",
      alpha = 1,
      # direction = ifelse(str_detect(names(table)[i], "_SD|Outlier"), 1, -1),
      na_color = "white",
    )
  )
}


t  |>
  kableExtra::row_spec(1, background = "grey") |> 
  kableExtra::kable_styling(full_width = TRUE, font_size = 9) |>
  kableExtra::scroll_box(width = "100%", height = "500px")
Participant Outlier_Tasks Potential_Outliers AttentionCheck_Session1 IPIP6_RT PID5_RT ASQ_RT SPQ_RT IPIP6_SD PID5_SD PHQ4_SD AttentionCheck_Session2 BPD_RT MAIA_RT PI_RT BPD_SD MAIA_SD PI_SD
Average 0.066 0.172 0.958 1.974 2.607 2.604 3.29 0.194 0.658 0.400 0.656 1.052 2.99 3.55 0.241 0.129 0.701
S061 0.000 1.000 1.000 0.817 6.877 1.362 1.87 0.145 0.509 0.000
S060 0.000 0.000 0.964 1.567 2.094 2.395 3.00 0.210 0.509 0.354
S059 0.000 0.000 0.997 2.304 2.234 2.914 2.42 0.255 0.907 0.707
S058 0.000 0.000 1.000 2.149 3.804 2.734 4.26 0.209 0.647 1.414
S057 0.000 0.250 1.000 3.019 2.526 2.324 3.80 0.167 0.724 0.354
S056 0.000 0.000 1.000 1.477 1.661 2.534 2.42 0.181 0.577 1.061
S055 0.000 0.000 0.964 1.084 1.428 1.200 1.91 0.237 0.715 0.000
S054 0.000 0.000 0.857 2.392 3.006 2.951 3.40 0.158 0.597 0.707
S053 0.000 0.000 1.000 1.324 2.183 1.636 2.44 0.117 0.571 0.000
S052 0.000 0.000 1.000 1.873 2.227 2.108 2.54 0.140 0.537 0.354
S051 0.000 0.000 1.000 2.072 2.736 2.599 3.64 0.157 0.199 0.354
S050 0.000 0.000 1.000 1.454 2.037 2.662 2.68 0.216 0.460 0.000
S049 0.000 0.500 0.929 2.178 4.859 3.562 5.11 0.207 0.528 0.000
S048 0.000 0.000 1.000 1.377 1.914 2.253 2.99 0.196 0.537 0.354
S047 0.000 0.500 0.779 4.321 1.188 0.905 3.75 0.184 0.089 0.000
S046 0.000 0.000 1.000 1.099 1.136 1.422 1.60 0.100 0.666 0.707
S045 0.000 0.000 0.893 2.371 2.143 2.942 3.05 0.258 0.606 0.354
S044 0.000 0.000 0.964 1.625 2.098 2.295 3.22 0.170 0.699 0.354
S043 0.000 0.000 0.964 1.304 1.868 1.949 2.29 0.219 0.398 0.354
S042 0.000 0.000 1.000 1.204 1.659 1.917 2.41 0.160 0.545 1.061
S041 0.000 0.000 1.000 1.455 1.949 1.693 2.06 0.181 0.732 0.354
S040 0.000 0.000 1.000 1.422 1.965 1.220 1.95 0.121 0.687 0.707
S039 0.000 0.000 1.000 2.092 3.134 2.945 2.90 0.190 0.656 0.707
S038 0.000 0.500 0.964 3.351 6.988 3.231 6.15 0.196 0.687 0.354
S037 0.000 0.500 1.000 0.770 4.167 2.955 1.45 0.104 0.398 0.354
S036 0.000 0.000 0.996 1.325 1.921 1.766 2.58 0.226 0.728 0.354
S035 0.000 0.000 0.993 2.358 2.258 2.601 2.81 0.182 0.837 0.354
S034 0.000 0.000 1.000 2.966 3.338 4.372 5.71 0.182 0.617 0.354
S033 0.000 0.000 1.000 0.692 0.890 1.091 1.23 0.088 0.288 0.000
S032 0.000 0.000 1.000 1.131 1.565 2.121 2.21 0.148 0.736 0.000
S031 0.000 0.000 0.964 1.509 1.611 1.729 1.97 0.224 0.830 0.354
S030 0.000 0.500 0.776 3.296 2.679 4.596 4.46 0.222 0.948 0.000
S029 0.000 1.000 0.913 1.302 6.534 3.338 3.83 0.167 1.146 0.000
S028 0.000 0.000 0.964 1.741 3.268 2.695 3.60 0.172 0.585 0.707
S027 0.000 0.000 1.000 1.369 1.777 2.340 4.06 0.161 0.675 0.000
S026 0.000 0.500 1.000 4.696 5.658 5.382 6.23 0.173 0.790 0.354
S025 0.000 0.500 1.000 2.561 2.680 5.594 4.80 0.317 0.748 0.000
S024 0.000 0.000 1.000 2.115 3.116 3.465 3.90 0.182 0.835 1.414
S023 0.000 0.000 1.000 1.548 1.875 2.191 2.89 0.277 0.674 0.000
S022 0.000 0.000 1.000 1.121 1.435 1.802 2.78 0.217 0.610 0.000
S021 0.000 0.750 1.000 4.232 3.404 6.555 7.42 0.484 0.819 1.061
S020 0.000 0.250 0.860 1.631 2.575 2.375 3.25 0.256 1.017 1.414
S019 0.000 0.250 0.893 1.980 2.789 3.059 5.71 0.205 0.744 0.354
S018 0.000 0.000 1.000 1.658 2.039 2.027 2.61 0.146 0.656 1.061
S017 0.000 0.000 0.964 1.299 1.681 1.911 2.06 0.201 0.937 0.354
S016 0.000 0.500 0.964 4.064 5.145 3.684 5.76 0.076 0.551 0.354
S015 0.000 0.000 1.000 1.045 1.275 1.418 2.60 0.292 0.713 0.354
S014 0.000 0.000 1.000 0.818 1.350 1.324 1.52 0.225 0.467 0.354
S013 0.000 0.000 1.000 1.071 1.270 1.418 2.42 0.093 0.710 0.000
S012 0.000 0.500 0.821 3.183 5.186 5.265 5.37 0.193 0.577 0.354
S011 0.000 0.500 1.000 1.871 2.802 2.380 3.38 0.155 0.814 0.354 0.667 1.188 3.28 3.53 0.313 0.130 0.769
S010 0.000 0.250 0.964 1.848 2.864 2.614 3.65 0.263 1.243 0.000 0.667 1.624 4.46 5.13 0.307 0.138 0.737
S009 0.000 0.000 1.000 1.347 2.064 2.250 2.51 0.296 0.635 0.354
S008 2.000 0.500 0.762 3.125 0.986 1.307 1.29 0.024 0.000 0.000
S007 0.000 0.000 0.929 1.095 1.324 1.792 2.00 0.211 0.702 0.707
S006 0.000 0.000 1.000 1.950 2.418 2.383 3.76 0.290 0.960 0.354 0.667 0.930 3.35 3.41 0.324 0.131 0.556
S005 0.000 0.000 1.000 1.828 2.952 2.998 3.55 0.163 0.577 0.707
S004 0.000 0.500 0.893 5.980 2.109 2.576 3.29 0.187 0.713 0.354
S003 2.000 0.000 0.857 0.955 1.375 2.160 1.96 0.216 0.847 0.000 0.613 0.719 1.59 3.71 0.102 0.098 0.819
S002 0.000 0.000 0.917 1.690 2.410 2.997 2.71 0.254 0.571 0.354 0.667 0.798 2.27 1.96 0.159 0.146 0.624
S001 0.000 0.750 0.728 1.912 2.523 4.542 7.27 0.179 0.659 0.707

# Inspection: select(sub[sub$Participant == "S008", ], starts_with("Item_PID"))
sub[
  sub$Participant %in% outliers_questionnaires,
  names(sub)[!names(sub) %in% c(
    "Participant", "Nationality", "Age",
    "Ethnicity", "Sex", "Student", "Education",
    "Interval", "AttentionCheck_Session1",
    "AttentionCheck_Session2"
  )]
] <- NA

Final Sample

We collected data from 61 participants.

illusion1 <- illusion1[!illusion1$Participant %in% outliers_illusion1, ]
illusion2 <- illusion2[!illusion2$Participant %in% outliers_illusion1, ]
perceptual <- perceptual[!perceptual$Participant %in% outliers_illusion1, ]
sub <- sub[!sub$Participant %in% outliers_illusion1, ]

The final sample included 60 participants (Mean age = 28.3, SD = 8.8, range: [20, 54]; Sex: 48.3% females, 51.7% males, 0.0% other; Education: High school, 33.33%; University (bachelor), 48.33%; University (doctorate), 1.67%; University (master), 16.67%), from which 2 (3.33%) completed session 2.

Country of Origin

select(sub, region = Nationality) |>
  group_by(region) |>
  summarize(n = n()) |>
  right_join(map_data("world"), by = "region") |>
  ggplot(aes(long, lat, group = group)) +
  geom_polygon(aes(fill = n)) +
  scale_fill_gradientn(colors = c("#FFEB3B", "red")) +
  theme_void() +
  ggtitle("Number of participants by country of origin")

Age

estimate_density(sub$Age) |>
  ggplot(aes(x = x, y = y)) +
  geom_area(fill = "#607D8B") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(title = "Age", y = "Distribution", color = NULL) +
  theme_modern(axis.title.space = 10) +
  theme(
    plot.title = element_text(size = rel(1), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(face = "italic", hjust = 0.5),
    axis.text.y = element_blank(),
    axis.text.x = element_text(size = rel(0.8)),
    axis.title.x = element_blank()
  )

Ethnicity

plot_waffle <- function(sub, what = "Nationality", title = what, rows = 8, size = 3) {
  ggwaffle::waffle_iron(sub, what, rows = rows) |>
    ggplot(aes(x, y)) +
    geom_point(aes(color = group), shape = "square", size = size) +
    coord_equal() +
    ggtitle(title) +
    labs(fill = "", color = "") +
    theme_void() +
    theme(
      plot.title = element_text(face = "bold", hjust = 0.5),
      legend.key.height = unit(1, "mm"),
      legend.key.width = unit(1, "mm")
    )
}

plot_waffle(sub, "Ethnicity", rows = 10, size = 5) +
  scale_color_manual(values = c("Hispanic" = "#FF5722", "Caucasian" = "#2196F3", "African" = "#4CAF50", "Other" = "#795548"))

Education

sub |>
  ggplot(aes(x = Education)) +
  geom_bar(aes(fill = Education)) +
  scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
  scale_fill_viridis_d(guide = "none") +
  labs(title = "Education", y = "Number of Participants") +
  theme_modern(axis.title.space = 15) +
  theme(
    plot.title = element_text(size = rel(1), face = "bold", hjust = 0.5),
    plot.subtitle = element_text(face = "italic", hjust = 0.5),
    axis.text.y = element_text(size = rel(0.8)),
    axis.text.x = element_text(size = rel(0.8)),
    axis.title.x = element_blank()
  )

Save Preprocessed

write.csv(illusion1, "../data/preprocessed_illusion1.csv", row.names = FALSE)
write.csv(illusion2, "../data/preprocessed_illusion2.csv", row.names = FALSE)
write.csv(perceptual, "../data/preprocessed_perceptual.csv", row.names = FALSE)
write.csv(sub, "../data/preprocessed_questionnaires.csv", row.names = FALSE)